<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head><link rel="stylesheet" type="text/css" href="insn.css"/><meta name="generator" content="iform.xsl"/><title>FMMLA</title></head><body><table style="margin: 0 auto;"><tr><td><div class="topbar"><a href="index.html">Base Instructions</a></div></td><td><div class="topbar"><a href="fpsimdindex.html">SIMD&amp;FP Instructions</a></div></td><td><div class="topbar"><a href="sveindex.html">SVE Instructions</a></div></td><td><div class="topbar"><a href="mortlachindex.html">SME Instructions</a></div></td><td><div class="topbar"><a href="encodingindex.html">Index by Encoding</a></div></td><td><div class="topbar"><a href="shared_pseudocode.html">Shared Pseudocode</a></div></td><td><div class="topbar"><a href="notice.html">Proprietary Notice</a></div></td></tr></table><hr/><h2 class="instruction-section">FMMLA</h2><p>Floating-point matrix multiply-accumulate</p>
      <p class="aml">The floating-point matrix multiply-accumulate instruction supports single-precision and double-precision data types in a 2×2 matrix contained in segments of 128 or 256 bits, respectively. It multiplies the 2×2 matrix in each segment of the first source vector by the 2×2 matrix in the corresponding segment of the second source vector. The resulting 2×2 matrix product is then destructively added to the matrix accumulator held in the corresponding segment of the addend and destination vector. This is equivalent to performing a 2-way dot product per destination element. This instruction is unpredicated. The single-precision variant is vector length agnostic. The double-precision variant requires that the current vector length is at least 256 bits, and if the current vector length is not an integer multiple of 256 bits then the trailing bits are set to zero.</p>
      <p class="aml">ID_AA64ZFR0_EL1.F32MM indicates whether the single-precision variant is implemented.</p>
      <p class="aml">ID_AA64ZFR0_EL1.F64MM indicates whether the double-precision variant is implemented.</p>
      <p class="aml">This instruction is illegal when executed in Streaming SVE mode, unless FEAT_SME_FA64 is implemented and enabled.</p>
    
    <p class="desc">
      It has encodings from 2 classes:
      <a href="#iclass_32_elem">32-bit element</a>
       and 
      <a href="#iclass_64_elem">64-bit element</a>
    </p>
    <h3 class="classheading"><a id="iclass_32_elem"/>32-bit element<span style="font-size:smaller;"><br/>(FEAT_F32MM)
          </span></h3><div class="regdiagram-32"><table class="regdiagram"><thead><tr><td>31</td><td>30</td><td>29</td><td>28</td><td>27</td><td>26</td><td>25</td><td>24</td><td>23</td><td>22</td><td>21</td><td>20</td><td>19</td><td>18</td><td>17</td><td>16</td><td>15</td><td>14</td><td>13</td><td>12</td><td>11</td><td>10</td><td>9</td><td>8</td><td>7</td><td>6</td><td>5</td><td>4</td><td>3</td><td>2</td><td>1</td><td>0</td></tr></thead><tbody><tr class="firstrow"><td class="l">0</td><td>1</td><td>1</td><td>0</td><td>0</td><td>1</td><td>0</td><td class="r">0</td><td class="lr">1</td><td class="lr">0</td><td class="lr">1</td><td colspan="5" class="lr">Zm</td><td class="l">1</td><td>1</td><td>1</td><td>0</td><td>0</td><td class="r">1</td><td colspan="5" class="lr">Zn</td><td colspan="5" class="lr">Zda</td></tr></tbody></table></div><div class="encoding"><h4 class="encoding"/><a id="fmmla_z_zzz_s"/><p class="asm-code">FMMLA   <a href="#sa_zda" title="Third source and destination scalable vector register (field &quot;Zda&quot;)">&lt;Zda&gt;</a>.S, <a href="#sa_zn" title="First source scalable vector register (field &quot;Zn&quot;)">&lt;Zn&gt;</a>.S, <a href="#sa_zm" title="Second source scalable vector register (field &quot;Zm&quot;)">&lt;Zm&gt;</a>.S</p></div><p class="pseudocode">if !<a href="shared_pseudocode.html#impl-aarch64.HaveSVE.0" title="function: boolean HaveSVE()">HaveSVE</a>() || !<a href="shared_pseudocode.html#impl-aarch64.HaveSVEFP32MatMulExt.0" title="function: boolean HaveSVEFP32MatMulExt()">HaveSVEFP32MatMulExt</a>() then UNDEFINED;
constant integer esize = 32;
integer n = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(Zn);
integer m = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(Zm);
integer da = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(Zda);</p>
    <h3 class="classheading"><a id="iclass_64_elem"/>64-bit element<span style="font-size:smaller;"><br/>(FEAT_F64MM)
          </span></h3><div class="regdiagram-32"><table class="regdiagram"><thead><tr><td>31</td><td>30</td><td>29</td><td>28</td><td>27</td><td>26</td><td>25</td><td>24</td><td>23</td><td>22</td><td>21</td><td>20</td><td>19</td><td>18</td><td>17</td><td>16</td><td>15</td><td>14</td><td>13</td><td>12</td><td>11</td><td>10</td><td>9</td><td>8</td><td>7</td><td>6</td><td>5</td><td>4</td><td>3</td><td>2</td><td>1</td><td>0</td></tr></thead><tbody><tr class="firstrow"><td class="l">0</td><td>1</td><td>1</td><td>0</td><td>0</td><td>1</td><td>0</td><td class="r">0</td><td class="lr">1</td><td class="lr">1</td><td class="lr">1</td><td colspan="5" class="lr">Zm</td><td class="l">1</td><td>1</td><td>1</td><td>0</td><td>0</td><td class="r">1</td><td colspan="5" class="lr">Zn</td><td colspan="5" class="lr">Zda</td></tr></tbody></table></div><div class="encoding"><h4 class="encoding"/><a id="fmmla_z_zzz_d"/><p class="asm-code">FMMLA   <a href="#sa_zda" title="Third source and destination scalable vector register (field &quot;Zda&quot;)">&lt;Zda&gt;</a>.D, <a href="#sa_zn" title="First source scalable vector register (field &quot;Zn&quot;)">&lt;Zn&gt;</a>.D, <a href="#sa_zm" title="Second source scalable vector register (field &quot;Zm&quot;)">&lt;Zm&gt;</a>.D</p></div><p class="pseudocode">if !<a href="shared_pseudocode.html#impl-aarch64.HaveSVE.0" title="function: boolean HaveSVE()">HaveSVE</a>() || !<a href="shared_pseudocode.html#impl-aarch64.HaveSVEFP64MatMulExt.0" title="function: boolean HaveSVEFP64MatMulExt()">HaveSVEFP64MatMulExt</a>() then UNDEFINED;
constant integer esize = 64;
integer n = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(Zn);
integer m = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(Zm);
integer da = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(Zda);</p>
  <div class="encoding-notes"/><h3 class="explanations">Assembler Symbols</h3><div class="explanations"><table><col class="asyn-l"/><col class="asyn-r"/><tr><td>&lt;Zda&gt;</td><td><a id="sa_zda"/>
        
          <p class="aml">Is the name of the third source and destination scalable vector register, encoded in the "Zda" field.</p>
        
      </td></tr></table><table><col class="asyn-l"/><col class="asyn-r"/><tr><td>&lt;Zn&gt;</td><td><a id="sa_zn"/>
        
          <p class="aml">Is the name of the first source scalable vector register, encoded in the "Zn" field.</p>
        
      </td></tr></table><table><col class="asyn-l"/><col class="asyn-r"/><tr><td>&lt;Zm&gt;</td><td><a id="sa_zm"/>
        
          <p class="aml">Is the name of the second source scalable vector register, encoded in the "Zm" field.</p>
        
      </td></tr></table></div><div class="syntax-notes"/>
    <div class="ps"><a id="execute"/><h3 class="pseudocode">Operation</h3>
      <p class="pseudocode"><a href="shared_pseudocode.html#impl-aarch64.CheckNonStreamingSVEEnabled.0" title="function: CheckNonStreamingSVEEnabled()">CheckNonStreamingSVEEnabled</a>();
constant integer VL = <a href="shared_pseudocode.html#impl-aarch64.CurrentVL.read.none" title="accessor: integer CurrentVL">CurrentVL</a>;
constant integer PL = VL DIV 8;
if VL &lt; esize * 4 then UNDEFINED;
constant integer segments = VL DIV (4 * esize);
bits(VL) operand1 = <a href="shared_pseudocode.html#impl-aarch64.Z.read.2" title="accessor: bits(width) Z[integer n, integer width]">Z</a>[n, VL];
bits(VL) operand2 = <a href="shared_pseudocode.html#impl-aarch64.Z.read.2" title="accessor: bits(width) Z[integer n, integer width]">Z</a>[m, VL];
bits(VL) operand3 = <a href="shared_pseudocode.html#impl-aarch64.Z.read.2" title="accessor: bits(width) Z[integer n, integer width]">Z</a>[da, VL];
bits(VL) result = <a href="shared_pseudocode.html#impl-shared.Zeros.1" title="function: bits(N) Zeros(integer N)">Zeros</a>(VL);
bits(4*esize) op1, op2;
bits(4*esize) res, addend;

for s = 0 to segments-1
    op1    = <a href="shared_pseudocode.html#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, s, 4*esize];
    op2    = <a href="shared_pseudocode.html#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, s, 4*esize];
    addend = <a href="shared_pseudocode.html#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, s, 4*esize];
    res    = <a href="shared_pseudocode.html#impl-shared.FPMatMulAdd.5" title="function: bits(N) FPMatMulAdd(bits(N) addend, bits(N) op1, bits(N) op2, integer esize, FPCRType fpcr)">FPMatMulAdd</a>(addend, op1, op2, esize, FPCR[]);
    <a href="shared_pseudocode.html#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, s, 4*esize] = res;

<a href="shared_pseudocode.html#impl-aarch64.Z.write.2" title="accessor: Z[integer n, integer width] = bits(width) value">Z</a>[da, VL] = result;</p>
    </div>
  <h3>Operational information</h3><p class="aml">
          This instruction might be immediately preceded in program order by a <span class="asm-code">MOVPRFX</span> instruction. The <span class="asm-code">MOVPRFX</span> instruction must conform to all of the following requirements, otherwise the behavior of the <span class="asm-code">MOVPRFX</span> and this instruction is <span class="arm-defined-word">unpredictable</span>:
        </p><ul><li>The <span class="asm-code">MOVPRFX</span> instruction must be unpredicated.</li><li>The <span class="asm-code">MOVPRFX</span> instruction must specify the same destination register as this instruction.</li><li>The destination register must not refer to architectural register state referenced by any other source operand register of this instruction.</li></ul><hr/><table style="margin: 0 auto;"><tr><td><div class="topbar"><a href="index.html">Base Instructions</a></div></td><td><div class="topbar"><a href="fpsimdindex.html">SIMD&amp;FP Instructions</a></div></td><td><div class="topbar"><a href="sveindex.html">SVE Instructions</a></div></td><td><div class="topbar"><a href="mortlachindex.html">SME Instructions</a></div></td><td><div class="topbar"><a href="encodingindex.html">Index by Encoding</a></div></td><td><div class="topbar"><a href="shared_pseudocode.html">Shared Pseudocode</a></div></td><td><div class="topbar"><a href="notice.html">Proprietary Notice</a></div></td></tr></table><p class="versions">
      Internal version only: isa v33.62, AdvSIMD v29.12, pseudocode v2023-03_rel, sve v2023-03_rc3b
      ; Build timestamp: 2023-03-31T11:36
    </p><p class="copyconf">
      Copyright © 2010-2023 Arm Limited or its affiliates. All rights reserved.
      This document is Non-Confidential.
    </p></body></html>
